package com.spider.filter;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.model.Song;
import com.spider.util.UrlUtil;

public class BaiduMusic {

	public List<Song> getEffectiveUrls(String input) {

		List<Song> songs = new ArrayList<Song>();
		input = input.replaceAll("[\\p{Punct}\\p{Space}]", "");
		String urlString = "http://www.baidu.com/s?wd=" + input
				+ "+site%3Amusic.baidu.com";
		String html = UrlUtil.getText(urlString);
		Matcher m = Pattern.compile(
				"href=\"(http://www.baidu.com/link\\?url=.*?)\"").matcher(html);
		while (m.find()) {
			String downloadHtml = m.group(1);
			Pattern pattern = Pattern.compile("data_url=\"(.*)\"");
			Matcher matcher = pattern.matcher(UrlUtil.getText(downloadHtml));
			if (matcher.find()) {
				Song song = new Song();
				song.setData(matcher.group(1));
				songs.add(song);
			}
		}
		System.out.println(songs);
		return songs;

	}
}
